return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
}
-static int send_pgupdates(int xc_handle, mmu_update_t *updates, int nr_updates)
-{
- int ret = -1;
- privcmd_hypercall_t hypercall;
-
- hypercall.op = __HYPERVISOR_mmu_update;
- hypercall.arg[0] = (unsigned long)updates;
- hypercall.arg[1] = (unsigned long)nr_updates;
-
- if ( mlock(updates, nr_updates * sizeof(*updates)) != 0 )
- goto out1;
-
- if ( do_xen_hypercall(xc_handle, &hypercall) < 0 )
- goto out2;
-
- ret = 0;
-
- out2: (void)munlock(updates, nr_updates * sizeof(*updates));
- out1: return ret;
-}
-
/* Read the kernel header, extracting the image size and load address. */
static int read_kernel_header(gzFile gfd, long dom_size,
unsigned long *load_addr)
l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
unsigned long *page_array = NULL;
- mmu_update_t *pgt_update_arr = NULL, *pgt_updates = NULL;
int alloc_index, num_pt_pages;
unsigned long l2tab;
unsigned long l1tab;
- unsigned long num_pgt_updates = 0;
unsigned long count, pt_start, i, j;
unsigned long initrd_addr = 0, initrd_len = 0;
start_info_t *start_info;
shared_info_t *shared_info;
unsigned long ksize;
+ mmu_t *mmu = NULL;
int pm_handle;
memset(builddomain, 0, sizeof(*builddomain));
if ( (pm_handle = init_pfn_mapper()) < 0 )
goto error_out;
- pgt_updates = malloc((tot_pages + 1) * sizeof(mmu_update_t));
- page_array = malloc(tot_pages * sizeof(unsigned long));
- pgt_update_arr = pgt_updates;
- if ( (pgt_update_arr == NULL) || (page_array == NULL) )
+ if ( (page_array = malloc(tot_pages * sizeof(unsigned long))) == NULL )
{
PERROR("Could not allocate memory");
goto error_out;
alloc_index--;
builddomain->ctxt.pt_base = l2tab;
+ if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
+ goto error_out;
+
/* Initialise the page tables. */
if ( (vl2tab = map_pfn_writeable(pm_handle, l2tab >> PAGE_SHIFT)) == NULL )
goto error_out;
*vl1e &= ~_PAGE_RW;
vl1e++;
- pgt_updates->ptr =
- (page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- pgt_updates->val = count;
- pgt_updates++;
- num_pgt_updates++;
+ if ( add_mmu_update(xc_handle, mmu,
+ (page_array[count] << PAGE_SHIFT) |
+ MMU_MACHPHYS_UPDATE, count) )
+ goto error_out;
}
unmap_pfn(pm_handle, vl1tab);
unmap_pfn(pm_handle, vl2tab);
* Pin down l2tab addr as page dir page - causes hypervisor to provide
* correct protection for the page
*/
- pgt_updates->ptr = l2tab | MMU_EXTENDED_COMMAND;
- pgt_updates->val = MMUEXT_PIN_L2_TABLE;
- pgt_updates++;
- num_pgt_updates++;
+ if ( add_mmu_update(xc_handle, mmu,
+ l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) )
+ goto error_out;
*virt_startinfo_addr =
virt_load_addr + ((alloc_index-1) << PAGE_SHIFT);
start_info->flags = 0;
strncpy(start_info->cmd_line, cmdline, MAX_CMD_LEN);
start_info->cmd_line[MAX_CMD_LEN-1] = '\0';
-
unmap_pfn(pm_handle, start_info);
/* shared_info page starts its life empty. */
unmap_pfn(pm_handle, shared_info);
/* Send the page update requests down to the hypervisor. */
- if ( send_pgupdates(xc_handle, pgt_update_arr, num_pgt_updates) < 0 )
+ if ( finish_mmu_updates(xc_handle, mmu) )
goto error_out;
+ free(mmu);
+ (void)close_pfn_mapper(pm_handle);
free(page_array);
- free(pgt_update_arr);
return 0;
error_out:
+ if ( mmu != NULL )
+ free(mmu);
if ( pm_handle >= 0 )
(void)close_pfn_mapper(pm_handle);
- if ( page_array )
+ if ( page_array != NULL )
free(page_array);
- if ( pgt_update_arr )
- free(pgt_update_arr);
return -1;
}
return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
}
-#define MAX_MMU_UPDATES 1024
-
-static int flush_mmu_updates(int xc_handle,
- mmu_update_t *mmu_updates,
- int *mmu_update_idx)
-{
- int err = 0;
- privcmd_hypercall_t hypercall;
-
- if ( *mmu_update_idx == 0 )
- return 0;
-
- hypercall.op = __HYPERVISOR_mmu_update;
- hypercall.arg[0] = (unsigned long)mmu_updates;
- hypercall.arg[1] = (unsigned long)*mmu_update_idx;
-
- if ( mlock(mmu_updates, sizeof(mmu_updates)) != 0 )
- {
- PERROR("Could not lock pagetable update array");
- err = 1;
- goto out;
- }
-
- if ( do_xen_hypercall(xc_handle, &hypercall) < 0 )
- {
- ERROR("Failure when submitting mmu updates");
- err = 1;
- }
-
- *mmu_update_idx = 0;
-
- (void)munlock(mmu_updates, sizeof(mmu_updates));
-
- out:
- return err;
-}
-
-static int add_mmu_update(int xc_handle,
- mmu_update_t *mmu_updates,
- int *mmu_update_idx,
- unsigned long ptr,
- unsigned long val)
-{
- mmu_updates[*mmu_update_idx].ptr = ptr;
- mmu_updates[*mmu_update_idx].val = val;
- if ( ++*mmu_update_idx == MAX_MMU_UPDATES )
- return flush_mmu_updates(xc_handle, mmu_updates, mmu_update_idx);
- return 0;
-}
-
static int checked_read(gzFile fd, void *buf, size_t count)
{
int rc;
int fd;
gzFile gfd;
- mmu_update_t mmu_updates[MAX_MMU_UPDATES];
- int mmu_update_idx = 0;
+ mmu_t *mmu = NULL;
int pm_handle = -1;
goto out;
}
+ if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
+ {
+ ERROR("Could not initialise for MMU updates");
+ goto out;
+ }
+
verbose_printf("Reloading memory pages: 0%%");
/*
unmap_pfn(pm_handle, ppage);
- if ( add_mmu_update(xc_handle, mmu_updates, &mmu_update_idx,
+ if ( add_mmu_update(xc_handle, mmu,
(mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, i) )
goto out;
}
{
if ( pfn_type[i] == L1TAB )
{
- if ( add_mmu_update(xc_handle, mmu_updates, &mmu_update_idx,
+ if ( add_mmu_update(xc_handle, mmu,
(pfn_to_mfn_table[i]<<PAGE_SHIFT) |
MMU_EXTENDED_COMMAND,
MMUEXT_PIN_L1_TABLE) )
}
else if ( pfn_type[i] == L2TAB )
{
- if ( add_mmu_update(xc_handle, mmu_updates, &mmu_update_idx,
+ if ( add_mmu_update(xc_handle, mmu,
(pfn_to_mfn_table[i]<<PAGE_SHIFT) |
MMU_EXTENDED_COMMAND,
MMUEXT_PIN_L2_TABLE) )
}
}
-
- if ( flush_mmu_updates(xc_handle, mmu_updates, &mmu_update_idx) )
+ if ( finish_mmu_updates(xc_handle, mmu) )
goto out;
verbose_printf("\b\b\b\b100%%\nMemory reloaded.\n");
rc = do_dom0_op(xc_handle, &op);
out:
+ if ( mmu != NULL )
+ free(mmu);
+
if ( rc != 0 )
{
if ( dom != 0 )
return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
}
-static int send_pgupdates(int xc_handle, mmu_update_t *updates, int nr_updates)
-{
- int ret = -1;
- privcmd_hypercall_t hypercall;
-
- hypercall.op = __HYPERVISOR_mmu_update;
- hypercall.arg[0] = (unsigned long)updates;
- hypercall.arg[1] = (unsigned long)nr_updates;
-
- if ( mlock(updates, nr_updates * sizeof(*updates)) != 0 )
- goto out1;
-
- if ( do_xen_hypercall(xc_handle, &hypercall) < 0 )
- goto out2;
-
- ret = 0;
-
- out2: (void)munlock(updates, nr_updates * sizeof(*updates));
- out1: return ret;
-}
-
static int setup_guestos(int xc_handle,
u64 dom,
gzFile kernel_gfd,
l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
unsigned long *page_array = NULL;
- mmu_update_t *pgt_update_arr = NULL, *pgt_updates = NULL;
int alloc_index, num_pt_pages;
unsigned long l2tab;
unsigned long l1tab;
- unsigned long num_pgt_updates = 0;
unsigned long count, pt_start;
unsigned long symtab_addr = 0, symtab_len = 0;
start_info_t *start_info;
shared_info_t *shared_info;
unsigned long ksize;
+ mmu_t *mmu = NULL;
int pm_handle;
memset(builddomain, 0, sizeof(*builddomain));
if ( (pm_handle = init_pfn_mapper()) < 0 )
goto error_out;
- pgt_updates = malloc((tot_pages + 1) * sizeof(mmu_update_t));
- page_array = malloc(tot_pages * sizeof(unsigned long));
- pgt_update_arr = pgt_updates;
- if ( (pgt_update_arr == NULL) || (page_array == NULL) )
+ if ( (page_array = malloc(tot_pages * sizeof(unsigned long))) == NULL )
{
PERROR("Could not allocate memory");
goto error_out;
l2tab = page_array[alloc_index] << PAGE_SHIFT;
alloc_index--;
builddomain->ctxt.pt_base = l2tab;
-
+
+ if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
+ goto error_out;
+
/* Initialise the page tables. */
if ( (vl2tab = map_pfn_writeable(pm_handle, l2tab >> PAGE_SHIFT)) == NULL )
goto error_out;
*vl1e &= ~_PAGE_RW;
vl1e++;
- pgt_updates->ptr =
- (page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- pgt_updates->val = count;
- pgt_updates++;
- num_pgt_updates++;
+ if ( add_mmu_update(xc_handle, mmu,
+ (page_array[count] << PAGE_SHIFT) |
+ MMU_MACHPHYS_UPDATE, count) )
+ goto error_out;
}
unmap_pfn(pm_handle, vl1tab);
unmap_pfn(pm_handle, vl2tab);
* Pin down l2tab addr as page dir page - causes hypervisor to provide
* correct protection for the page
*/
- pgt_updates->ptr = l2tab | MMU_EXTENDED_COMMAND;
- pgt_updates->val = MMUEXT_PIN_L2_TABLE;
- pgt_updates++;
- num_pgt_updates++;
+ if ( add_mmu_update(xc_handle, mmu,
+ l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) )
+ goto error_out;
*virt_startinfo_addr =
*virt_load_addr + ((alloc_index-1) << PAGE_SHIFT);
start_info->flags = 0;
strncpy(start_info->cmd_line, cmdline, MAX_CMD_LEN);
start_info->cmd_line[MAX_CMD_LEN-1] = '\0';
-
unmap_pfn(pm_handle, start_info);
/* shared_info page starts its life empty. */
unmap_pfn(pm_handle, shared_info);
/* Send the page update requests down to the hypervisor. */
- if ( send_pgupdates(xc_handle, pgt_update_arr, num_pgt_updates) < 0 )
+ if ( finish_mmu_updates(xc_handle, mmu) )
goto error_out;
+ free(mmu);
+ (void)close_pfn_mapper(pm_handle);
free(page_array);
- free(pgt_update_arr);
return 0;
error_out:
+ if ( mmu != NULL )
+ free(mmu);
if ( pm_handle >= 0 )
(void)close_pfn_mapper(pm_handle);
if ( page_array == NULL )
free(page_array);
- if ( pgt_update_arr == NULL )
- free(pgt_update_arr);
return -1;
}
{
(void)munmap(vaddr, PAGE_SIZE);
}
+
+#define FIRST_MMU_UPDATE 2
+
+static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
+{
+ int err = 0;
+ privcmd_hypercall_t hypercall;
+
+ if ( mmu->idx == FIRST_MMU_UPDATE )
+ return 0;
+
+ /* The first two requests set the correct subject domain. */
+ mmu->updates[0].val = (unsigned long)(mmu->subject<<16) & ~0xFFFFUL;
+ mmu->updates[0].ptr = (unsigned long)(mmu->subject<< 0) & ~0xFFFFUL;
+ mmu->updates[1].val = (unsigned long)(mmu->subject>>16) & ~0xFFFFUL;
+ mmu->updates[1].ptr = (unsigned long)(mmu->subject>>32) & ~0xFFFFUL;
+ mmu->updates[0].ptr |= MMU_EXTENDED_COMMAND;
+ mmu->updates[0].val |= MMUEXT_SET_SUBJECTDOM_L;
+ mmu->updates[1].ptr |= MMU_EXTENDED_COMMAND;
+ mmu->updates[1].val |= MMUEXT_SET_SUBJECTDOM_H;
+
+ hypercall.op = __HYPERVISOR_mmu_update;
+ hypercall.arg[0] = (unsigned long)mmu->updates;
+ hypercall.arg[1] = (unsigned long)mmu->idx;
+
+ if ( mlock(mmu->updates, sizeof(mmu->updates)) != 0 )
+ {
+ PERROR("Could not lock pagetable update array");
+ err = 1;
+ goto out;
+ }
+
+ if ( do_xen_hypercall(xc_handle, &hypercall) < 0 )
+ {
+ ERROR("Failure when submitting mmu updates");
+ err = 1;
+ }
+
+ mmu->idx = FIRST_MMU_UPDATE;
+
+ (void)munlock(mmu->updates, sizeof(mmu->updates));
+
+ out:
+ return err;
+}
+
+mmu_t *init_mmu_updates(int xc_handle, domid_t dom)
+{
+ mmu_t *mmu = malloc(sizeof(mmu_t));
+ if ( mmu == NULL )
+ return mmu;
+ mmu->idx = FIRST_MMU_UPDATE;
+ mmu->subject = dom;
+ return mmu;
+}
+
+int add_mmu_update(int xc_handle, mmu_t *mmu,
+ unsigned long ptr, unsigned long val)
+{
+ mmu->updates[mmu->idx].ptr = ptr;
+ mmu->updates[mmu->idx].val = val;
+ if ( ++mmu->idx == MAX_MMU_UPDATES )
+ return flush_mmu_updates(xc_handle, mmu);
+ return 0;
+}
+
+int finish_mmu_updates(int xc_handle, mmu_t *mmu)
+{
+ return flush_mmu_updates(xc_handle, mmu);
+}
void *map_pfn_readonly(int pm_handle, unsigned long pfn);
void unmap_pfn(int pm_handle, void *vaddr);
+/*
+ * MMU updates.
+ */
+#define MAX_MMU_UPDATES 1024
+typedef struct {
+ mmu_update_t updates[MAX_MMU_UPDATES];
+ int idx;
+ domid_t subject;
+} mmu_t;
+mmu_t *init_mmu_updates(int xc_handle, domid_t dom);
+int add_mmu_update(int xc_handle, mmu_t *mmu,
+ unsigned long ptr, unsigned long val);
+int finish_mmu_updates(int xc_handle, mmu_t *mmu);
+
#endif /* __XC_PRIVATE_H__ */
struct task_struct *p2 = NULL;
event_channel_t *chn1, *chn2;
int port2;
- unsigned long cpu_mask;
+ unsigned long cpu_mask = 0;
long rc = 0;
again:
if ( chn2[port2].remote_dom != p1 )
BUG();
- chn2[port2].state = ECS_ZOMBIE;
+ chn2[port2].state = ECS_DISCONNECTED;
chn2[port2].remote_dom = NULL;
chn2[port2].remote_port = 0xFFFF;
- cpu_mask = set_event_disc(p1, port1);
cpu_mask |= set_event_disc(p2, port2);
- guest_event_notify(cpu_mask);
}
chn1[port1].state = ECS_FREE;
chn1[port1].remote_dom = NULL;
chn1[port1].remote_port = 0xFFFF;
+ cpu_mask |= set_event_disc(p1, port1);
+ guest_event_notify(cpu_mask);
+
out:
spin_unlock(&p1->event_channel_lock);
put_task_struct(p1);
case ECS_FREE:
status->status = EVTCHNSTAT_closed;
break;
- case ECS_ZOMBIE:
+ case ECS_DISCONNECTED:
status->status = EVTCHNSTAT_disconnected;
break;
case ECS_CONNECTED:
/******************************************************************************
* memory.c
*
- * Copyright (c) 2002 K A Fraser
+ * Copyright (c) 2002-2004 K A Fraser
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
static int alloc_l2_table(struct pfn_info *page);
static int alloc_l1_table(struct pfn_info *page);
-static int get_page_from_pagenr(unsigned long page_nr);
+static int get_page_from_pagenr(unsigned long page_nr, int check_level);
static int get_page_and_type_from_pagenr(unsigned long page_nr,
- unsigned int type);
+ unsigned int type,
+ int check_level);
+#define CHECK_STRICT 0 /* Subject domain must own the page */
+#define CHECK_ANYDOM 1 /* Any domain may own the page (if subject is priv.) */
static void free_l2_table(struct pfn_info *page);
static void free_l1_table(struct pfn_info *page);
static struct {
#define DOP_FLUSH_TLB (1<<0) /* Flush the TLB. */
#define DOP_RELOAD_LDT (1<<1) /* Reload the LDT shadow mapping. */
- unsigned long flags;
- unsigned long cr0;
-} deferred_op[NR_CPUS] __cacheline_aligned;
+ unsigned long deferred_ops;
+ unsigned long cr0;
+ domid_t subject_id;
+ struct task_struct *subject_p;
+} percpu_info[NR_CPUS] __cacheline_aligned;
/*
* init_frametable:
unsigned long page_index;
unsigned long flags;
- memset(deferred_op, 0, sizeof(deferred_op));
+ memset(percpu_info, 0, sizeof(percpu_info));
max_page = nr_pages;
frame_table_size = nr_pages * sizeof(struct pfn_info);
}
/* Dispose of the (now possibly invalid) mappings from the TLB. */
- deferred_op[p->processor].flags |= DOP_FLUSH_TLB | DOP_RELOAD_LDT;
+ percpu_info[p->processor].deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT;
}
}
-/* Domain 0 is allowed to build page tables on others' behalf. */
-static inline int dom0_get_page(struct pfn_info *page)
-{
- unsigned long x, nx, y = page->count_and_flags;
-
- do {
- x = y;
- nx = x + 1;
- if ( unlikely((x & PGC_count_mask) == 0) ||
- unlikely((nx & PGC_count_mask) == 0) )
- return 0;
- }
- while ( unlikely((y = cmpxchg(&page->count_and_flags, x, nx)) != x) );
-
- return 1;
-}
-
-
-static int get_page_from_pagenr(unsigned long page_nr)
+static int get_page_from_pagenr(unsigned long page_nr, int check_level)
{
+ struct task_struct *p = current;
struct pfn_info *page = &frame_table[page_nr];
+ unsigned long y, x, nx;
- if ( unlikely(page_nr >= max_page) )
+ if ( unlikely(!pfn_is_ram(page_nr)) )
{
- MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
+ MEM_LOG("Pfn %08lx is not RAM", page_nr);
return 0;
}
- if ( unlikely(!get_page(page, current)) &&
- unlikely((current->domain != 0) || !dom0_get_page(page)) )
+ /* Find the correct subject domain. */
+ if ( unlikely(percpu_info[p->processor].subject_p != NULL) )
+ p = percpu_info[p->processor].subject_p;
+
+ /* Demote ANYDOM to STRICT if subject domain is not privileged. */
+ if ( check_level == CHECK_ANYDOM && !IS_PRIV(p) )
+ check_level = CHECK_STRICT;
+
+ switch ( check_level )
{
- MEM_LOG("Could not get page reference for pfn %08lx\n", page_nr);
- return 0;
+ case CHECK_STRICT:
+ if ( unlikely(!get_page(page, p)) )
+ {
+ MEM_LOG("Could not get page ref for pfn %08lx\n", page_nr);
+ return 0;
+ }
+ break;
+ case CHECK_ANYDOM:
+ y = page->count_and_flags;
+ do {
+ x = y;
+ nx = x + 1;
+ if ( unlikely((x & PGC_count_mask) == 0) ||
+ unlikely((nx & PGC_count_mask) == 0) )
+ {
+ MEM_LOG("Could not get page ref for pfn %08lx\n", page_nr);
+ return 0;
+ }
+ }
+ while ( unlikely((y = cmpxchg(&page->count_and_flags, x, nx)) != x) );
+ break;
}
return 1;
static int get_page_and_type_from_pagenr(unsigned long page_nr,
- unsigned int type)
+ unsigned int type,
+ int check_level)
{
struct pfn_info *page = &frame_table[page_nr];
- if ( unlikely(!get_page_from_pagenr(page_nr)) )
+ if ( unlikely(!get_page_from_pagenr(page_nr, check_level)) )
return 0;
if ( unlikely(!get_page_type(page, type)) )
if ( (l2_pgentry_val(l2e) >> PAGE_SHIFT) != pfn )
{
/* Make sure the mapped frame belongs to the correct domain. */
- if ( unlikely(!get_page_from_pagenr(l2_pgentry_to_pagenr(l2e))) )
+ if ( unlikely(!get_page_from_pagenr(l2_pgentry_to_pagenr(l2e),
+ CHECK_STRICT)) )
return 0;
/*
static int get_page_from_l1e(l1_pgentry_t l1e)
{
- ASSERT(l1_pgentry_val(l1e) & _PAGE_PRESENT);
+ unsigned long l1v = l1_pgentry_val(l1e);
+ unsigned long pfn = l1_pgentry_to_pagenr(l1e);
- if ( unlikely((l1_pgentry_val(l1e) & (_PAGE_GLOBAL|_PAGE_PAT))) )
+ if ( !(l1v & _PAGE_PRESENT) )
+ return 1;
+
+ if ( unlikely(l1v & (_PAGE_GLOBAL|_PAGE_PAT)) )
{
- MEM_LOG("Bad L1 page type settings %04lx",
- l1_pgentry_val(l1e) & (_PAGE_GLOBAL|_PAGE_PAT));
+ MEM_LOG("Bad L1 type settings %04lx", l1v & (_PAGE_GLOBAL|_PAGE_PAT));
return 0;
}
- if ( l1_pgentry_val(l1e) & _PAGE_RW )
+ if ( unlikely(!pfn_is_ram(pfn)) )
+ {
+ if ( IS_PRIV(current) )
+ return 1;
+ MEM_LOG("Non-privileged attempt to map I/O space %08lx", pfn);
+ return 0;
+ }
+
+ if ( l1v & _PAGE_RW )
{
if ( unlikely(!get_page_and_type_from_pagenr(
- l1_pgentry_to_pagenr(l1e), PGT_writeable_page)) )
+ pfn, PGT_writeable_page, CHECK_ANYDOM)) )
return 0;
set_bit(_PGC_tlb_flush_on_type_change,
- &frame_table[l1_pgentry_to_pagenr(l1e)].count_and_flags);
+ &frame_table[pfn].count_and_flags);
return 1;
}
- return get_page_from_pagenr(l1_pgentry_to_pagenr(l1e));
+ return get_page_from_pagenr(pfn, CHECK_ANYDOM);
}
/* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
static int get_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
{
- ASSERT(l2_pgentry_val(l2e) & _PAGE_PRESENT);
+ if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) )
+ return 1;
if ( unlikely((l2_pgentry_val(l2e) & (_PAGE_GLOBAL|_PAGE_PSE))) )
{
}
if ( unlikely(!get_page_and_type_from_pagenr(
- l2_pgentry_to_pagenr(l2e), PGT_l1_page_table)) )
+ l2_pgentry_to_pagenr(l2e), PGT_l1_page_table, CHECK_STRICT)) )
return get_linear_pagetable(l2e, pfn);
return 1;
static void put_page_from_l1e(l1_pgentry_t l1e)
{
struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
+ unsigned long l1v = l1_pgentry_val(l1e);
- ASSERT(l1_pgentry_val(l1e) & _PAGE_PRESENT);
+ if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(l1v >> PAGE_SHIFT) )
+ return;
- if ( l1_pgentry_val(l1e) & _PAGE_RW )
+ if ( l1v & _PAGE_RW )
{
put_page_and_type(page);
}
*/
static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
{
- ASSERT(l2_pgentry_val(l2e) & _PAGE_PRESENT);
-
if ( (l2_pgentry_val(l2e) & _PAGE_PRESENT) &&
((l2_pgentry_val(l2e) >> PAGE_SHIFT) != pfn) )
put_page_and_type(&frame_table[l2_pgentry_to_pagenr(l2e)]);
static int alloc_l2_table(struct pfn_info *page)
{
unsigned long page_nr = page - frame_table;
- l2_pgentry_t *pl2e, l2e;
+ l2_pgentry_t *pl2e;
int i;
pl2e = map_domain_mem(page_nr << PAGE_SHIFT);
for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- {
- l2e = pl2e[i];
-
- if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) )
- continue;
-
- if ( unlikely(!get_page_from_l2e(l2e, page_nr)) )
+ if ( unlikely(!get_page_from_l2e(pl2e[i], page_nr)) )
goto fail;
- }
/* Now we add our private high mappings. */
memcpy(&pl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
fail:
while ( i-- > 0 )
- {
- l2e = pl2e[i];
- if ( l2_pgentry_val(l2e) & _PAGE_PRESENT )
- put_page_from_l2e(l2e, page_nr);
- }
+ put_page_from_l2e(pl2e[i], page_nr);
unmap_domain_mem(pl2e);
return 0;
static int alloc_l1_table(struct pfn_info *page)
{
unsigned long page_nr = page - frame_table;
- l1_pgentry_t *pl1e, l1e;
+ l1_pgentry_t *pl1e;
int i;
pl1e = map_domain_mem(page_nr << PAGE_SHIFT);
for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- {
- l1e = pl1e[i];
-
- if ( !(l1_pgentry_val(l1e) & _PAGE_PRESENT) )
- continue;
-
- if ( unlikely(!get_page_from_l1e(l1e)) )
+ if ( unlikely(!get_page_from_l1e(pl1e[i])) )
goto fail;
- }
- /* Make sure we unmap the right page! */
unmap_domain_mem(pl1e);
return 1;
fail:
while ( i-- > 0 )
- {
- l1e = pl1e[i];
- if ( !(l1_pgentry_val(l1e) & _PAGE_PRESENT) )
- continue;
- put_page_from_l1e(l1e);
- }
+ put_page_from_l1e(pl1e[i]);
unmap_domain_mem(pl1e);
return 0;
static void free_l2_table(struct pfn_info *page)
{
unsigned long page_nr = page - frame_table;
- l2_pgentry_t *pl2e, l2e;
+ l2_pgentry_t *pl2e;
int i;
pl2e = map_domain_mem(page_nr << PAGE_SHIFT);
for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- {
- l2e = pl2e[i];
- if ( (l2_pgentry_val(l2e) & _PAGE_PRESENT) &&
- unlikely((l2_pgentry_val(l2e) >> PAGE_SHIFT) != page_nr) )
- put_page_and_type(&frame_table[l2_pgentry_to_pagenr(l2e)]);
- }
+ put_page_from_l2e(pl2e[i], page_nr);
unmap_domain_mem(pl2e);
}
static void free_l1_table(struct pfn_info *page)
{
unsigned long page_nr = page - frame_table;
- l1_pgentry_t *pl1e, l1e;
+ l1_pgentry_t *pl1e;
int i;
pl1e = map_domain_mem(page_nr << PAGE_SHIFT);
for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- {
- l1e = pl1e[i];
- if ( !(l1_pgentry_val(l1e) & _PAGE_PRESENT) )
- continue;
- put_page_from_l1e(l1e);
- }
+ put_page_from_l1e(pl1e[i]);
unmap_domain_mem(pl1e);
}
return 0;
}
- if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT )
- put_page_from_l2e(ol2e, pfn);
-
+ put_page_from_l2e(ol2e, pfn);
return 1;
}
if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
return 0;
- if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT )
- put_page_from_l2e(ol2e, pfn);
-
+ put_page_from_l2e(ol2e, pfn);
return 1;
}
return 0;
}
- if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT )
- put_page_from_l1e(ol1e);
-
+ put_page_from_l1e(ol1e);
return 1;
}
if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
return 0;
- if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT )
- put_page_from_l1e(ol1e);
-
+ put_page_from_l1e(ol1e);
return 1;
}
unsigned long pfn = ptr >> PAGE_SHIFT;
struct pfn_info *page = &frame_table[pfn];
- /* 'ptr' must be in range except where it isn't a machine address. */
- if ( (pfn >= max_page) && (cmd != MMUEXT_SET_LDT) )
- {
- MEM_LOG("Ptr out of range for extended MMU command");
- return 1;
- }
-
switch ( cmd )
{
case MMUEXT_PIN_L1_TABLE:
case MMUEXT_PIN_L2_TABLE:
- okay = get_page_and_type_from_pagenr(pfn,
- (cmd == MMUEXT_PIN_L2_TABLE) ?
- PGT_l2_page_table :
- PGT_l1_page_table);
+ okay = get_page_and_type_from_pagenr(
+ pfn, (cmd == MMUEXT_PIN_L2_TABLE) ? PGT_l2_page_table :
+ PGT_l1_page_table,
+ CHECK_STRICT);
if ( unlikely(!okay) )
{
MEM_LOG("Error while pinning pfn %08lx", pfn);
+ put_page(page);
break;
}
break;
case MMUEXT_UNPIN_TABLE:
- if ( unlikely(!(okay = get_page_from_pagenr(pfn))) )
+ if ( unlikely(!(okay = get_page_from_pagenr(pfn, CHECK_STRICT))) )
{
MEM_LOG("Page %08lx bad domain (dom=%p)",
ptr, page->u.domain);
break;
case MMUEXT_NEW_BASEPTR:
- okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table);
+ okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table,
+ CHECK_STRICT);
if ( likely(okay) )
{
put_page_and_type(&frame_table[pagetable_val(current->mm.pagetable)
>> PAGE_SHIFT]);
current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
invalidate_shadow_ldt();
- deferred_op[cpu].flags |= DOP_FLUSH_TLB;
+ percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
}
else
{
break;
case MMUEXT_TLB_FLUSH:
- deferred_op[cpu].flags |= DOP_FLUSH_TLB;
+ percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
break;
case MMUEXT_INVLPG:
current->mm.ldt_base = ptr;
current->mm.ldt_ents = ents;
load_LDT(current);
- deferred_op[cpu].flags &= ~DOP_RELOAD_LDT;
+ percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT;
if ( ents != 0 )
- deferred_op[cpu].flags |= DOP_RELOAD_LDT;
+ percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT;
}
break;
}
+ case MMUEXT_SET_SUBJECTDOM_L:
+ percpu_info[cpu].subject_id = (domid_t)((ptr&~0xFFFF)|(val>>16));
+ break;
+
+ case MMUEXT_SET_SUBJECTDOM_H:
+ percpu_info[cpu].subject_id |= (domid_t)((ptr&~0xFFFF)|(val>>16))<<32;
+ if ( !IS_PRIV(current) )
+ {
+ MEM_LOG("Dom %llu has no privilege to set subject domain",
+ current->domain);
+ okay = 0;
+ }
+ else
+ {
+ if ( percpu_info[cpu].subject_p != NULL )
+ put_task_struct(percpu_info[cpu].subject_p);
+ percpu_info[cpu].subject_p = find_domain_by_id(
+ percpu_info[cpu].subject_id);
+ if ( percpu_info[cpu].subject_p == NULL )
+ {
+ MEM_LOG("Unknown domain '%llu'", percpu_info[cpu].subject_id);
+ okay = 0;
+ }
+ }
+ break;
+
default:
MEM_LOG("Invalid extended pt command 0x%08lx", val & MMUEXT_CMD_MASK);
okay = 0;
int do_mmu_update(mmu_update_t *ureqs, int count)
{
mmu_update_t req;
- unsigned long va = 0, flags, pfn, prev_pfn = 0;
+ unsigned long va = 0, deferred_ops, pfn, prev_pfn = 0;
struct pfn_info *page;
int rc = 0, okay = 1, i, cpu = smp_processor_id();
unsigned int cmd;
* MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
*/
case MMU_NORMAL_PT_UPDATE:
- page = &frame_table[pfn];
-
- if ( unlikely(pfn >= max_page) )
- {
- MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page);
- break;
- }
-
- if ( unlikely(!get_page(page, current)) &&
- ((current->domain != 0) || !dom0_get_page(page)) )
+ if ( unlikely(!get_page_from_pagenr(pfn, CHECK_STRICT)) )
{
MEM_LOG("Could not get page for normal update");
break;
prev_pfn = pfn;
}
+ page = &frame_table[pfn];
switch ( (page->type_and_flags & PGT_type_mask) )
{
case PGT_l1_page_table:
break;
- case MMU_UNCHECKED_PT_UPDATE:
- req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
- if ( likely(IS_PRIV(current)) )
- {
- if ( likely(prev_pfn == pfn) )
- {
- va = (va & PAGE_MASK) | (req.ptr & ~PAGE_MASK);
- }
- else
- {
- if ( prev_pfn != 0 )
- unmap_domain_mem((void *)va);
- va = (unsigned long)map_domain_mem(req.ptr);
- prev_pfn = pfn;
- }
- *(unsigned long *)va = req.val;
- okay = 1;
- }
- else
- {
- MEM_LOG("Bad unchecked update attempt");
- }
- break;
-
case MMU_MACHPHYS_UPDATE:
- page = &frame_table[pfn];
- if ( unlikely(pfn >= max_page) )
+ if ( unlikely(!get_page_from_pagenr(pfn, CHECK_STRICT)) )
{
- MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page);
- }
- else if ( likely(get_page(page, current)) ||
- ((current->domain == 0) && dom0_get_page(page)) )
- {
- machine_to_phys_mapping[pfn] = req.val;
- okay = 1;
- put_page(page);
+ MEM_LOG("Could not get page for mach->phys update");
+ break;
}
+
+ machine_to_phys_mapping[pfn] = req.val;
+ okay = 1;
+ put_page(&frame_table[pfn]);
break;
/*
if ( prev_pfn != 0 )
unmap_domain_mem((void *)va);
- flags = deferred_op[cpu].flags;
- deferred_op[cpu].flags = 0;
+ deferred_ops = percpu_info[cpu].deferred_ops;
+ percpu_info[cpu].deferred_ops = 0;
- if ( flags & DOP_FLUSH_TLB )
+ if ( deferred_ops & DOP_FLUSH_TLB )
write_cr3_counted(pagetable_val(current->mm.pagetable));
- if ( flags & DOP_RELOAD_LDT )
+ if ( deferred_ops & DOP_RELOAD_LDT )
(void)map_ldt_shadow_page(0);
+ if ( unlikely(percpu_info[cpu].subject_p != NULL) )
+ {
+ put_task_struct(percpu_info[cpu].subject_p);
+ percpu_info[cpu].subject_p = NULL;
+ }
+
return rc;
}
int do_update_va_mapping(unsigned long page_nr,
unsigned long val,
- unsigned long caller_flags)
+ unsigned long flags)
{
struct task_struct *p = current;
int err = 0;
unsigned int cpu = p->processor;
- unsigned long defer_flags;
+ unsigned long deferred_ops;
if ( unlikely(page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT)) )
return -EINVAL;
mk_l1_pgentry(val))) )
err = -EINVAL;
- defer_flags = deferred_op[cpu].flags;
- deferred_op[cpu].flags = 0;
+ deferred_ops = percpu_info[cpu].deferred_ops;
+ percpu_info[cpu].deferred_ops = 0;
- if ( unlikely(defer_flags & DOP_FLUSH_TLB) ||
- unlikely(caller_flags & UVMF_FLUSH_TLB) )
+ if ( unlikely(deferred_ops & DOP_FLUSH_TLB) ||
+ unlikely(flags & UVMF_FLUSH_TLB) )
write_cr3_counted(pagetable_val(p->mm.pagetable));
- else if ( unlikely(caller_flags & UVMF_INVLPG) )
+ else if ( unlikely(flags & UVMF_INVLPG) )
__flush_tlb_one(page_nr << PAGE_SHIFT);
- if ( unlikely(defer_flags & DOP_RELOAD_LDT) )
+ if ( unlikely(deferred_ops & DOP_RELOAD_LDT) )
(void)map_ldt_shadow_page(0);
return err;
* - check for pages with corrupt ref-count
* Interrupts are diabled completely. use with care.
*/
-void audit_all_pages (u_char key, void *dev_id, struct pt_regs *regs)
+void audit_all_pages(u_char key, void *dev_id, struct pt_regs *regs)
{
unsigned long i, j, k;
unsigned long ref_count;
/* walk the frame table */
for ( i = 0; i < max_page; i++ )
{
-
/* check for zombies */
if ( ((frame_table[i].count_and_flags & PGC_count_mask) != 0) &&
((frame_table[i].count_and_flags & PGC_zombie) != 0) )
#ifndef _I386_PAGE_H
#define _I386_PAGE_H
-
-#ifndef __ASSEMBLY__
#define BUG() do { \
printk("BUG at %s:%d\n", __FILE__, __LINE__); \
__asm__ __volatile__("ud2"); \
} while (0)
-#endif /* __ASSEMBLY__ */
-
#define L1_PAGETABLE_SHIFT 12
#define L2_PAGETABLE_SHIFT 22
#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT))
#define VALID_PAGE(page) ((page - frame_table) < max_mapnr)
+/*
+ * NB. We don't currently track I/O holes in the physical RAM space.
+ * For now we guess that I/O devices will be mapped in the first 1MB
+ * (e.g., VGA buffers) or beyond the end of physical RAM.
+ */
+#define pfn_is_ram(_pfn) (((_pfn) > 0x100) && ((_pfn) < max_page))
+
/* High table entries are reserved by the hypervisor. */
#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \
(HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
* which shifts the least bits out.
*/
/* A normal page-table update request. */
-#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
-/* DOM0 can make entirely unchecked updates which do not affect refcnts. */
-#define MMU_UNCHECKED_PT_UPDATE 1 /* unchecked '*ptr = val'. ptr is MA. */
+#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
/* Update an entry in the machine->physical mapping table. */
-#define MMU_MACHPHYS_UPDATE 2 /* ptr = MA of frame to modify entry for */
+#define MMU_MACHPHYS_UPDATE 2 /* ptr = MA of frame to modify entry for */
/* An extended command. */
-#define MMU_EXTENDED_COMMAND 3 /* least 8 bits of val demux further */
+#define MMU_EXTENDED_COMMAND 3 /* least 8 bits of val demux further */
/* Extended commands: */
-#define MMUEXT_PIN_L1_TABLE 0 /* ptr = MA of frame to pin */
-#define MMUEXT_PIN_L2_TABLE 1 /* ptr = MA of frame to pin */
-#define MMUEXT_PIN_L3_TABLE 2 /* ptr = MA of frame to pin */
-#define MMUEXT_PIN_L4_TABLE 3 /* ptr = MA of frame to pin */
-#define MMUEXT_UNPIN_TABLE 4 /* ptr = MA of frame to unpin */
-#define MMUEXT_NEW_BASEPTR 5 /* ptr = MA of new pagetable base */
-#define MMUEXT_TLB_FLUSH 6 /* ptr = NULL */
-#define MMUEXT_INVLPG 7 /* ptr = NULL ; val = VA to invalidate */
-#define MMUEXT_SET_LDT 8 /* ptr = VA of table; val = # entries */
+#define MMUEXT_PIN_L1_TABLE 0 /* ptr = MA of frame to pin */
+#define MMUEXT_PIN_L2_TABLE 1 /* ptr = MA of frame to pin */
+#define MMUEXT_PIN_L3_TABLE 2 /* ptr = MA of frame to pin */
+#define MMUEXT_PIN_L4_TABLE 3 /* ptr = MA of frame to pin */
+#define MMUEXT_UNPIN_TABLE 4 /* ptr = MA of frame to unpin */
+#define MMUEXT_NEW_BASEPTR 5 /* ptr = MA of new pagetable base */
+#define MMUEXT_TLB_FLUSH 6 /* ptr = NULL */
+#define MMUEXT_INVLPG 7 /* ptr = NULL ; val = VA to invalidate */
+#define MMUEXT_SET_LDT 8 /* ptr = VA of table; val = # entries */
+/* NB. MMUEXT_SET_SUBJECTDOM must consist of *_L followed immediately by *_H */
+#define MMUEXT_SET_SUBJECTDOM_L 9 /* (ptr[31:15],val[31:15]) = dom[31:0] */
+#define MMUEXT_SET_SUBJECTDOM_H 10 /* (ptr[31:15],val[31:15]) = dom[63:32] */
#define MMUEXT_CMD_MASK 255
#define MMUEXT_CMD_SHIFT 8
{
struct task_struct *remote_dom;
u16 remote_port;
-#define ECS_FREE 0 /* Available for use. */
-#define ECS_ZOMBIE 1 /* Connection is closed. Remote is disconnected. */
-#define ECS_CONNECTED 2 /* Connected to remote end. */
+#define ECS_FREE 0 /* Available for use. */
+#define ECS_DISCONNECTED 1 /* Connection is closed. Remote is disconnected. */
+#define ECS_CONNECTED 2 /* Connected to remote end. */
u16 state;
} event_channel_t;
}
pte = pte_offset(pmd, vaddr);
- if ( pte_io(*pte) || (pgprot_val(prot) & _PAGE_IO) )
- queue_unchecked_mmu_update(pte, phys | pgprot_val(prot));
- else
- queue_l1_entry_update(pte, phys | pgprot_val(prot));
+ queue_l1_entry_update(pte, phys | pgprot_val(prot));
/*
* It's enough to flush this one mapping.
#if defined(CONFIG_XENO_PRIV)
-#define direct_set_pte(_p, _v) queue_unchecked_mmu_update((_p), (_v).pte_low)
+/* These hacky macros avoid phys->machine translations. */
#define __direct_pte(x) ((pte_t) { (x) } )
#define __direct_mk_pte(page_nr,pgprot) \
__direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
#define direct_mk_pte_phys(physpage, pgprot) \
__direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
-
static inline void direct_remap_area_pte(pte_t *pte,
unsigned long address,
unsigned long size,
printk("direct_remap_area_pte: page already exists\n");
BUG();
}
- direct_set_pte(pte, pte_mkio(direct_mk_pte_phys(machine_addr, prot)));
+ set_pte(pte, pte_mkio(direct_mk_pte_phys(machine_addr, prot)));
address += PAGE_SIZE;
machine_addr += PAGE_SIZE;
pte++;
void queue_set_ldt(unsigned long ptr, unsigned long bytes);
#define MMU_UPDATE_DEBUG 0
-#define queue_unchecked_mmu_update(_p,_v) queue_l1_entry_update( \
- (pte_t *)((unsigned long)(_p)|MMU_UNCHECKED_PT_UPDATE),(_v))
-
#if MMU_UPDATE_DEBUG > 0
typedef struct {
void *ptr;
struct page *page = pte_page(pte);
#if defined(CONFIG_XENO_PRIV)
if (pte_io(pte)) {
- queue_unchecked_mmu_update(ptep, 0);
+ queue_l1_entry_update(ptep, 0);
continue;
}
#endif